#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time   :  2019/7/4 10:50
@Author :  geqh
@file   :  keyword_EN.py
"""
from utils.log import *
from utils.confPaser import *
import os
from keywordEN import RAKE
import operator


def keywordEN(text, keyphrasenum=4):
    stoppath = os.path.join(project_path, "conf/stopword_en.txt")
    sentenceList = RAKE.split_sentences(text)
    stopwordpattern = RAKE.build_stop_word_regex(stoppath)

    # generate candidate keywords
    phraseList = RAKE.generate_candidate_keywords(sentenceList, stopwordpattern)

    # calculate individual word scores
    wordscores = RAKE.calculate_word_scores(phraseList)

    # generate candidate keyword scores
    keywordcandidates = RAKE.generate_candidate_keyword_scores(phraseList, wordscores)
    sortedKeywords = sorted(keywordcandidates.items(), key=operator.itemgetter(1), reverse=True)

    keyphrase = []
    for keyword in sortedKeywords[0:keyphrasenum]:
        keyphrase.append({"phrase": keyword[0], "weight": keyword[1]})
    logger.info(keyphrase)
    return keyphrase
